################################################
######### DATA PROCESSING ######################

# you should just use zeligs setx command for dataprocessing unless you are using ZINB
XZout_Function <- function(data, count.vars, zero.vars,vary){
  # data is the data you use, already collapsed by meaning or medianing /whatever to a vector
  # count.vars is the name of the variables for the count
  # zero.vars is the name of the variables for the systematic zero component. Put no name if you are doing NB.
  # vary is the parameter(s) you vary from zero to 1 (string)
  
  x.out <- cbind(1,t(data[count.vars]))[rep(1,length(seq(from = 0, to = 1, by = 0.1))),] 
  z.out <- cbind(1,t(data[zero.vars]))[rep(1,length(seq(from = 0, to = 1, by = 0.1))),]
  x.out[,vary] <- seq(from=0,to=1,by=0.1)
  z.out[,vary] <- seq(from=0,to=1,by=0.1)
  output <-list()
  output$x.out <- x.out
  output$z.out <- z.out
  return(output) # reference x.out (ZINB and NB) and z.out (ZINB)
}


##########################################################
######### ZINB EXPECTATION AND PREDICTION FUNCTION #######

ExpPred_Function.ZINB <- function(object, x.out,z.out, par.simN, predict.simN){
  # object = model object
  # x.out = the matrix with which we will calculate the count. 
      # Must be in same order as count parameters
  # z.out = the matrix with which we will calculate the structural zeros. 
      # Must be in same order as zero parameters
  # par.simN is the N of the number of parameter simulations we will draw
  # predict.simN is the N of the number of simulations we will use for predicted values
  
  par <- object$optim$par
  par.varcov <- -solve(object$optim$hess) # set variance covariance matrixs
  # simulated parameters
  sim.par <- rmvnorm(n = par.simN, par, par.varcov)
  
  # simulated predicted values
  sim.predict <- rzinbinom(predict.simN*dim(x.out)[1], 
                         mu=exp(sim.par[,1:ncol(x.out)]%*% t(x.out)), 
                         size=1/exp(sim.par[,ncol(sim.par)]), 
                          zprob=(1/(1+exp(sim.par[,(ncol(x.out)+1):(ncol(x.out)+
                         ncol(z.out))]%*% t(-z.out) ))))
  
  # get expected values distribution
  mu.expect <- exp(sim.par[,1:ncol(x.out)]%*% t(x.out))
  psi.expect <- (1/(1+exp(sim.par[,(ncol(x.out)+1):(ncol(x.out)+
                         ncol(z.out))]%*% t(-z.out) )))
  sim.expect <- (1-psi.expect)*mu.expect
  
  output <- list()
  output$expect <- sim.expect
  output$predict <- sim.predict
  return(output)
}

############################################################################
#########ZINB EXPECTATION AND PREDICTION FUNCTION: 1*Log(Population) #######
# this function is specifically designed to deal with an offset setting
# the coefficient on logpopulation to 1.

ExpPred_Function.ZINB.log <- function(object, x.out, z.out, logpop, par.simN, predict.simN){
  # object = ZINB model object
  # x.out = the matrix with which we will calculate the count. 
  # Must be in same order as count parameters
  # z.out = the matrix with which we will calculate the structural zeros. 
  # Must be in same order as zero parameters
  # par.simN is the N of the number of parameter simulations we will draw
  # predict.simN is the N of the number of simulations we will use for predicted values
  # logpop is the value for log(population) in a model where the coefficient for log(population) is set to 1
  
  par <- object$optim$par
  par.varcov <- -solve(object$optim$hess) # set variance covariance matrixs
  # simulated parameters
  sim.par <- rmvnorm(n = par.simN, par, par.varcov)
  
  # simulated predicted values
  sim.predict <- rzinbinom(predict.simN*dim(x.out)[1], 
                           mu=exp(sim.par[,1:ncol(x.out)]%*% t(x.out)+logpop), 
                           size=1/exp(sim.par[,ncol(sim.par)]), 
                           zprob=(1/(1+exp(sim.par[,(ncol(x.out)+1):(ncol(x.out)+
                             ncol(z.out))]%*% t(-z.out) ))))
  
  # get expected values distribution
  mu.expect <- exp(sim.par[,1:ncol(x.out)]%*% t(x.out)+logpop)
  psi.expect <- (1/(1+exp(sim.par[,(ncol(x.out)+1):(ncol(x.out)+
    ncol(z.out))]%*% t(-z.out) )))
  sim.expect <- (1-psi.expect)*mu.expect
  
  output <- list()
  output$expect <- sim.expect
  output$predict <- sim.predict
  return(output)
}


###################################################################
######### FIRST DIFFERENCES: ZINB #################################

par.simN <- 10^4
ExpPred_Function.ZINB.fd <- function(object, x.out0,z.out0,x.out1,z.out1, par.simN){
  # par = parameters from a ZINB model (all parameters including theta)
  # hess = hessian from a ZINB model (all parameters including theta)
  # x.out0 = the matrix with which we will calculate the count for the original data
      # Must be in same order as count parameters
  # z.out0 = the matrix with which we will calculate the structural zeros for the original data
      # Must be in same order as zero parameters
  # x.out1 and z.out1, the first differences matrix that we move to
  # par.simN is the N of the number of parameter simulations we will draw
 
  
  par <- object$optim$par
  par.varcov <- -solve(object$optim$hess) # set variance covariance matrixs
  # simulated parameters
  sim.par <- rmvnorm(n = par.simN, par, par.varcov)
  
  # get expected values distribution
  mu.expect0 <- exp(sim.par[,1:length(x.out0)] %*% x.out0)
  psi.expect0 <- (1/(1+exp(sim.par[,(length(x.out0)+1):(length(x.out0)+
                         length(z.out0))]%*% -z.out0)))
  mu.expect1 <- exp(sim.par[,1:length(x.out1)] %*% x.out1)
  psi.expect1 <- (1/(1+exp(sim.par[,(length(x.out1)+1):(length(x.out1)+
                         length(z.out1))]%*% -z.out1)))
  sim.fd <- (1-psi.expect1)*mu.expect1 - (1-psi.expect0)*mu.expect0
  
  return(sim.fd)
}


######################################################
######## NB PREDICTION AND EXPECTION FUNCTION #######

ExpPred_Function.NB <- function(model, x.out, par.simN, predict.simN){
  # this function is a temporary fix until I figure out how to get out the hessian.
  # for the moment, this function assumes that the covariance 
  # model = the NB model. 
  # x.out = the matrix with which we will calculate the count. 
  # par.simN is the N of the number of parameter simulations we will draw
  # predict.simN is the N of the number of simulations we will use for predicted values
  
  par <- c(model$coeff, model$theta)
  par.vcov <- rbind(cbind(vcov(model),0),0) # set variance covariance matrixs
  par.vcov[dim(par.vcov)[1],dim(par.vcov)[2]] <- 0 #model$SE.theta^2
  
  # simulated parameters
  sim.par <- rmvnorm(n = par.simN, par, par.vcov)
  x.out.s <- split(x.out,c(1:nrow(x.out))) #split x.out by rows
  # apply rbinom to each level of competition
  sim.predict.s <- lapply(x.out.s, function(x) rnbinom(predict.simN, mu=exp(sim.par[,1:ncol(x.out)]%*% t(x)),
                                                       size=sim.par[,dim(sim.par)[2]]))
  # unsplit into a matrix by level of competition on top, sims on bottom
  sim.predict <- matrix(unlist(sim.predict.s),ncol=nrow(x.out))
  
  # get expected values distribution
  sim.expect <- exp(sim.par[,1:ncol(x.out)]%*% t(x.out))
  
  output <- list()
  output$expect <- sim.expect
  output$predict <- sim.predict
  return(output)
}



#######################################################################
######## NB PREDICTION AND EXPECTION FUNCTION: 1*log(population) #######
# this function calculates expected and predicted values
# for an NB model with log population with a coefficient offset to 1.



ExpPred_Function.NB.log <- function(model, x.out, logpop, par.simN, predict.simN){
  # this function is a temporary fix until I figure out how to get out the hessian.
  # for the moment, this function assumes that the covariance 
  # model = the NB model. 
  # x.out = the matrix with which we will calculate the count. 
  # par.simN is the N of the number of parameter simulations we will draw
  # predict.simN is the N of the number of simulations we will use for predicted values
  
  par <- c(model$coeff, model$theta)
  par.vcov <- rbind(cbind(vcov(model),0),0) # set variance covariance matrixs
  par.vcov[dim(par.vcov)[1],dim(par.vcov)[2]] <- 0 #model$SE.theta^2
  
  # simulated parameters
  sim.par <- rmvnorm(n = par.simN, par, par.vcov)
  x.out.s <- split(x.out,c(1:nrow(x.out))) #split x.out by rows
  # apply rbinom to each level of competition
  sim.predict.s <- lapply(x.out.s, function(x) rnbinom(predict.simN, mu=exp(sim.par[,1:ncol(x.out)]%*% t(x)+logpop),
                                                       size=sim.par[,dim(sim.par)[2]]))
  # unsplit into a matrix by level of competition on top, sims on bottom
  sim.predict <- matrix(unlist(sim.predict.s),ncol=nrow(x.out))
  
  # get expected values distribution
  sim.expect <- exp(sim.par[,1:ncol(x.out)]%*% t(x.out)+logpop)
  
  output <- list()
  output$expect <- sim.expect
  output$predict <- sim.predict
  return(output)
}




#############################################################################################
############# FIRST DIFFERENCES: NB WITH 1*LOGPOPULATION #################################
# this function calculates first differences for an NB model with log population with a coefficient offset to 1.


ExpPred_Function.NB.p2.log.fd <- function(model, x.out1, x.out0, logpop, par.simN){
  # this function is a temporary fix until I figure out how to get out the hessian.
  # for the moment, this function assumes that the covariance 
  # model = the NB model. 
  # x.out0 = the matrix with which we will calculate the count for mu1-mu0
  # x.out1 = same for mu1-mu0
  # par.simN is the N of the number of parameter simulations we will draw
  # predict.simN is the N of the number of simulations we will use for predicted values
  
  par <- c(model$coeff, model$theta)
  par.vcov <- rbind(cbind(vcov(model),0),0) # set variance covariance matrixs
  par.vcov[dim(par.vcov)[1],dim(par.vcov)[2]] <- 0 #model$SE.theta^2, as is commonly done (not ideal, 
                                #but the same procedure as in Zelig)
  
  # simulated parameters
  sim.par <- rmvnorm(n = par.simN, par, par.vcov)

  # get expected values distribution
  sim.fd <- exp(sim.par[,1:ncol(x.out1)]%*% t(x.out1)+logpop) - 
          exp(sim.par[,1:ncol(x.out0)]%*% t(x.out0)+logpop)

  return(sim.fd)
}